
###################################################################################
# File to convert ARC events data to long format and merge with ARC Organizations #
###################################################################################

# Load libraries
library(tidyverse)
library(lubridate)

# Set directory to source file location
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# Load ARC events data
ARC_Events <- read_rds("ARC_2_0_Events.rds")

# Load ARC Organizations data
ARC_Organizations <- read_rds("ARC_2_0_Organizations.rds") %>% 
  # Drop variables that are not needed or aggregations from the events data
  dplyr::select(-c(ccode, year, n_scad, n_scad_maximalist, n_scad_nonviolent, n_scad_violent, n_ged, scad_id, ged_id))

# Load Supplementary Fronts Data
Supplementary_Fronts <- read_rds("ARC_2_0_Suppl_Fronts.rds") %>% 
  mutate(front_year=1) %>%
  mutate(ccode=as.character(ccode))

# From Supplementary Fronts, drop variables that are the same as in ARC_Organizations
Supplementary_Fronts <- Supplementary_Fronts %>%
  dplyr::select(-c(country_name, country_abbr, org_name, org_type, unique_arc_id))

# Rename front_ties to front_constituent_orgs
Supplementary_Fronts <- Supplementary_Fronts %>%
  rename(front_constituent_orgs = front_ties)

# Convert the ARC events data to long format 
ARC_Events_long <- ARC_Events %>%
  separate_rows(org_id, sep = ",") %>%
  mutate(org_id = str_trim(org_id))

# Create a variable "year" in ARC_Events_merged to match with ARC_Events_merged
ARC_Events_long <- ARC_Events_long %>%
  mutate(year_start = as.integer(format(as.Date(event_startdate), "%Y"))) %>%
  mutate(year_end = as.integer(format(as.Date(event_enddate), "%Y")))

# Make the case with the missing year 2016
ARC_Events_long <- ARC_Events_long %>%
  mutate(year_end = ifelse(is.na(year_end) & eventid=="scad.6200671", 2016, year_end))

# Expand between start and end years, keeping all other information
ARC_Events_long <- ARC_Events_long %>%
  rowwise() %>%
  mutate(event_year = list(seq(year_start, year_end))) %>%
  unnest(cols = c(event_year)) %>%
  select(-year_start, -year_end) %>%
  mutate(ccode=as.character(ccode))

# Merge the long format ARC events data with ARC Organizations data
ARC_Events_merged <- ARC_Events_long %>%
  left_join(ARC_Organizations, by = c("org_id" = "org_id"))

# Merge with Supplementary Fronts Data on ccode org_id and year
ARC_Events_merged <- ARC_Events_merged %>%
  left_join(Supplementary_Fronts, by = c("ccode" = "ccode", "org_id" = "org_id", "event_year" = "year"))

# Export the merged data as rds and csv files
write_rds(ARC_Events_merged, "ARC_2_0_event_org_year_merged.rds")
write_csv(ARC_Events_merged, "ARC_2_0_event_org_year_merged.csv")

# Write out the Events and Organizations Data as csvs
write_csv(ARC_Events, "ARC_2_0_Events.csv")

ARC_Organizations <- read_rds("ARC_2_0_Organizations.rds")
write_csv(ARC_Organizations, "ARC_2_0_Organizations.csv")

Supplementary_Fronts <- read_rds("ARC_2_0_Suppl_Fronts.rds")
write_csv(Supplementary_Fronts, "ARC_2_0_Suppl_Fronts.csv")

#################
# End of script #
#################

